# -*- coding=utf-8 -*-
'''
抓取网络资源程序
'''

import urllib2
import cookielib
import urllib
import json
# log.info(u'====================资源下载爬虫开始运行=======================')
import db
import log
from BeautifulSoup import BeautifulSoup
import os

from setting import DOWNLOAD_BASE_PATH

cookie_support = urllib2.HTTPCookieProcessor(cookielib.CookieJar())
opener = urllib2.build_opener(cookie_support, urllib2.HTTPHandler)
urllib2.install_opener(opener)

headers = {
    'User-Agent':'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:23.0) Gecko/20100101 Firefox/23.0'
}

#下载超时重试次数
retry_count = 0

def login():
    '''
    用户登录
    '''
    # 用户登录数据
    postdata = urllib.urlencode({
        'a':'loginin',
        'c':'member',
        'is_ajax':'1',
        'm':'user',
        'password':'naruto1314',
        'remember':'0',
        'username':'zeronbo'
    })
    # 用户登录
    req = urllib2.Request(
        url='http://s.eduyun.cn/index.php',
        data=postdata
    )
    log.info(u'登录教育公共资源平台...')
    result = urllib2.urlopen(req).read()
    json_res = json.loads(result)
    if json_res['msg'] == u'成功登录':
        log.info(json_res['msg'])
        return True
    else:
        log.info(u'登录失败: %s' % json_res['msg'])
        return False


# 资源中心首页
download_indexurl = 'http://s.eduyun.cn/index.php?m=resources&c=index&a=index'

def load_tree():
    '''
        获取资源菜单树根目录
    '''
    
    # 树型菜单请求url
    tree_url = 'http://s.eduyun.cn/index.php?m=resources&a=index&a=ztree&fid=F&rootname='
    tree_data = urllib.urlencode({
                                  'otherParam':'zTreeAsyncTest'
                                  })
    req_tree = urllib2.Request(
                               url=tree_url,
                               headers=headers,
                               data=tree_data
                               )
    log.info(u'获取资源树形菜单(根目录)数据...')
    root_tree = urllib2.urlopen(req_tree).read()
    json_tree = json.loads(root_tree);
    for t in json_tree:
        db.add_tree_data(t['id'], t['pId'], t['name'], t['t'], t['pname'],0 if t['isParent'] else 1)
        if t['isParent']:
            load_child_tree(t['id'], t['name'], t['pname'], t['pname'])



def load_child_tree(pkid, n, pname, parent_path):
    '''
        获取资源类型树形子菜单
    '''
    global retry_count
    
    tree_url = 'http://s.eduyun.cn/index.php?m=resources&a=index&a=ztree&fid=F&rootname='
    tree_data = urllib.urlencode({
                                  'id':pkid,
                                  'n':n,
                                  'otherParam':'zTreeAsyncTest',
                                  'pname':pname
                                  })
    
    req_tree = urllib2.Request(
                               url=tree_url,
                               headers=headers,
                               data=tree_data
                               )
    child_tree = None
    try:
        child_tree = urllib2.urlopen(req_tree, timeout=15).read()
        json_child_tree = json.loads(child_tree)
        retry_count = 0
        for t in json_child_tree:
            if t['isParent']:
                db.add_tree_data(t['id'], t['pId'], t['name'], t['t'], t['pname'], 0)
                load_child_tree(t['id'], t['name'], t['pname'], parent_path + '/' + t['name'])
            else:
                db.add_tree_data(t['id'], t['pId'], t['name'], parent_path + '/' + t['name'], t['pname'], 1)
    except:
        retry_count = retry_count + 1
        log.error(u'下载出错[%s,%s,%s],重试:%d' % (pkid, n, pname, retry_count))
        if retry_count <= 3:
            load_child_tree(pkid, n, pname, parent_path)
        else:
            log.info(u'已重试%d次，放弃下载当前记录，进行下一条！' % retry_count)
# print(root_tree)
# print('小学语文'.decode('utf-8'))

# down_url = 'http://s.eduyun.cn/index.php?m=resources&c=manage&a=ajaxDownload&res_id=516e7db2e4b06dd2ca3fce52'
# req = urllib2.Request(
#                       url=download_indexurl,
#                       headers=headers
#                       )
# content = urllib2.urlopen(req).read()
# soup = BeautifulSoup(content)
# print(soup.prettify())

# urllib.urlretrieve(req, '/home/zero/file/abc.doc')


# id    J01010101
# n    一年级上册
# otherParam    zTreeAsyncTest
# pname    小学语文/人教新课标版/一年级上册

def get_res_item():
    url = 'http://s.eduyun.cn/index.php'
    req_url = url_create(url, '小学语文/人教新课标版/一年级上册/1.a o e', 1)
    
    item_content = urllib2.urlopen(req_url, timeout=15).read()
    soup = BeautifulSoup(item_content)
#     print(soup.prettify())
    src_name = []
    print('资源名：')
    for s in soup.findAll('span', {'class':'dttitle fl'}):
#         print(s.find('a').text)
        src_name.append(s.find('a').text)
    
    print('=====================')
    file_type = []
    print('文件类型：')
    for s in soup.findAll('span', {'class':'dtform'}):
#         print(s.text[3:])
        file_type.append(s.text[3:])
    
    print('==========================')
    print('下载链接：')
    for s in soup.findAll('span', {'class':'dtdown'}):
#         down_url = 'http://s.eduyun.cn/index.php?m=resources&c=manage&a=ajaxDownload&res_id=516e7db2e4b06dd2ca3fce52'
         
        src_id = s.find('a')['id']
        download_url = 'http://s.eduyun.cn/index.php?m=resources&c=manage&a=ajaxDownload&res_id=' + src_id
        print('下载请求URL:%s' % download_url)
        req = urllib2.Request(
                      url=download_url,
                      headers=headers
                      )
        content = urllib2.urlopen(req).read()
        print(content)
        print('==================================================')
    
    print('===============================================');
    str_count = soup.findAll('p',{'class': 'yxf_num'})[0].text
    print(str_count[7:len(str_count) - 1])

def url_create(url, textbook_list, page):
    url_data = urllib.urlencode({
                                  'm':'resources',
                                  'a':'load_res_list',
                                  'textbook_list':textbook_list,
                                  'page':page
                                  })
    return url + '?' + url_data

def test():
#     s = {"username":"admin","password":"password","tenantid":""}
#     print(s)
#     json_string=json.dumps(s)
#     print(json_string)
#     python_obj=json.loads(json_string)
#     print(python_obj)
    str = 'abc\'sd\'ds\'df'
    print(str)
    print(str.replace('\'', '\'\''))
#     try:
#         number1 = float(12.12)
#         number2 = float(0)
#         result = number1 / number2
#     except ValueError:
#         print "You must enter two numbers"
#     except ZeroDivisionError:
#         print "Attempted to divide by zero"
#     except:
#         log.error(u'出错！')


def src_mkdirs(path):
    '''
        创建资源存放目录
    '''
    path = path_format(path)
    src_path = DOWNLOAD_BASE_PATH + path
    log.info('创建资源目录:%s' % src_path)
    if not os.path.exists(src_path):
        os.makedirs(src_path)
    else:
        log.warn('资源目录已存在:%s' % src_path)

def path_format(path):
    path = path.replace('*', '&')
    path = path.replace('<', '(')
    path = path.replace('>', ')')
    path = path.replace(' ', '')
    path = path.replace('?', '!')
    path = path.replace('"', '”')
    path = path.replace(':', '：')
    path = path.replace('|', '1')
    return path
    
if '__main__' == __name__:
    db.add_table()
#     num_count = 0;
#     for d in db.find_child_tree_isread():
#         src_mkdirs(d[3])
    login()
#     load_tree()
#     test()
#     db.query_tree()
    get_res_item()

# log.info(u'====================资源下载爬虫停止运行=======================')    
